Load the data

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
bike_sharing <- read_csv("~/Downloads/bikesharing.csv")
## Rows: 731 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): season, month, weekday, weather
## dbl  (7): year, temperature_F, casual, registered, count, humidity, windspeed
## lgl  (2): holiday, workingday
## date (2): date, date_noyear
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(bike_sharing)
##     season             month                year           date           
##  Length:731         Length:731         Min.   :2011   Min.   :2011-01-01  
##  Class :character   Class :character   1st Qu.:2011   1st Qu.:2011-07-02  
##  Mode  :character   Mode  :character   Median :2012   Median :2012-01-01  
##                                        Mean   :2012   Mean   :2012-01-01  
##                                        3rd Qu.:2012   3rd Qu.:2012-07-01  
##                                        Max.   :2012   Max.   :2012-12-31  
##   date_noyear          holiday          weekday          workingday     
##  Min.   :2000-01-01   Mode :logical   Length:731         Mode :logical  
##  1st Qu.:2000-04-01   FALSE:710       Class :character   FALSE:231      
##  Median :2000-07-02   TRUE :21        Mode  :character   TRUE :500      
##  Mean   :2000-07-01                                                     
##  3rd Qu.:2000-10-01                                                     
##  Max.   :2000-12-31                                                     
##    weather          temperature_F       casual         registered  
##  Length:731         Min.   :36.40   Min.   :   2.0   Min.   :  20  
##  Class :character   1st Qu.:56.85   1st Qu.: 315.5   1st Qu.:2497  
##  Mode  :character   Median :68.80   Median : 713.0   Median :3662  
##                     Mean   :68.56   Mean   : 848.2   Mean   :3656  
##                     3rd Qu.:80.35   3rd Qu.:1096.0   3rd Qu.:4776  
##                     Max.   :95.60   Max.   :3410.0   Max.   :6946  
##      count         humidity       windspeed    
##  Min.   :  22   Min.   : 0.00   Min.   : 1.50  
##  1st Qu.:3152   1st Qu.:52.00   1st Qu.: 9.00  
##  Median :4548   Median :62.70   Median :12.10  
##  Mean   :4504   Mean   :62.79   Mean   :12.76  
##  3rd Qu.:5956   3rd Qu.:73.00   3rd Qu.:15.60  
##  Max.   :8714   Max.   :97.30   Max.   :34.00

Weather across all data

bike_sharing %>%
  ggplot(aes(weather)) +
  geom_bar(fill="dodgerblue")

Weather in the summer

bike_sharing %>%
  filter(season=="summer") %>%
  ggplot(aes(weather)) +
  geom_bar(fill = "dodgerblue")

Vignette 1

bike_sharing %>%
  ggplot(aes(humidity, count)) +
  geom_point(color="dodgerblue")

bike_sharing %>%
  ggplot(aes(windspeed, count)) +
  geom_point(color="dodgerblue")

bike_sharing %>%
  ggplot(aes(temperature_F, count)) +
  geom_point(color="dodgerblue")

bike_sharing %>%
  ggplot(aes(temperature_F, count, color=season)) +
  geom_point()

bike_sharing %>%
  ggplot(aes(temperature_F, count, color=season, shape = weather)) +
  geom_point()

bike_sharing %>%
  ggplot(aes(temperature_F, count, color=weather, shape = season)) +
  geom_point()

bike_sharing %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(temperature_F, count, color=season, shape = weather, size=frac_casual)) +
  geom_point()

bike_sharing %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(temperature_F, count, color=frac_casual)) +
  geom_point()

bike_sharing %>%
  ggplot(aes(temperature_F, count, color=weather, shape = season)) +
  geom_point() + 
  labs(title = "Effect of weather conditions on number of riders", x = "temperature (F)", y = "number of riders")

bike_sharing %>%
  filter(season == "summer") %>%
  arrange(count) %>%
  head()
## # A tibble: 6 × 15
##   season month      year date       date_noyear holiday weekday  worki…¹ weather
##   <chr>  <chr>     <dbl> <date>     <date>      <lgl>   <chr>    <lgl>   <chr>  
## 1 summer August     2011 2011-08-27 2000-08-27  FALSE   Saturday FALSE   cloudy 
## 2 summer September  2011 2011-09-08 2000-09-08  FALSE   Thursday TRUE    rain   
## 3 summer September  2011 2011-09-07 2000-09-07  FALSE   Wednesd… TRUE    rain   
## 4 summer September  2011 2011-09-06 2000-09-06  FALSE   Tuesday  TRUE    rain   
## 5 summer July       2011 2011-07-23 2000-07-23  FALSE   Saturday FALSE   clear  
## 6 summer September  2011 2011-09-05 2000-09-05  TRUE    Monday   FALSE   cloudy 
## # … with 6 more variables: temperature_F <dbl>, casual <dbl>, registered <dbl>,
## #   count <dbl>, humidity <dbl>, windspeed <dbl>, and abbreviated variable name
## #   ¹​workingday
bike_sharing %>%
  filter(season == "fall") %>%
  arrange(count) %>%
  head()
## # A tibble: 6 × 15
##   season month     year date       date_noyear holiday weekday   worki…¹ weather
##   <chr>  <chr>    <dbl> <date>     <date>      <lgl>   <chr>     <lgl>   <chr>  
## 1 fall   October   2012 2012-10-29 2000-10-29  FALSE   Monday    TRUE    rain   
## 2 fall   October   2011 2011-10-29 2000-10-29  FALSE   Saturday  FALSE   rain   
## 3 fall   December  2011 2011-12-07 2000-12-07  FALSE   Wednesday TRUE    rain   
## 4 fall   October   2012 2012-10-30 2000-10-30  FALSE   Tuesday   TRUE    cloudy 
## 5 fall   November  2011 2011-11-24 2000-11-24  TRUE    Thursday  FALSE   clear  
## 6 fall   November  2011 2011-11-22 2000-11-22  FALSE   Tuesday   TRUE    rain   
## # … with 6 more variables: temperature_F <dbl>, casual <dbl>, registered <dbl>,
## #   count <dbl>, humidity <dbl>, windspeed <dbl>, and abbreviated variable name
## #   ¹​workingday

Vignette 2

What affects the number of casual riders on a given day?

bike_sharing %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual)) +
  geom_point(color="dodgerblue")

bike_sharing %>%
  filter(year == 2011) %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual)) +
  geom_point(color="dodgerblue")

bike_sharing %>%
  filter(year == 2011) %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual, color = season)) +
  geom_point()

bike_sharing %>%
  filter(year == 2011) %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual, color = temperature_F)) +
  geom_point()

bike_sharing %>%
  filter(year == 2011) %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual, color = humidity)) +
  geom_point()

bike_sharing %>%
  filter(year == 2011) %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual, color = workingday)) +
  geom_point()

bike_sharing %>%
  filter(year == 2011) %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual, color = workingday, size = holiday)) +
  geom_point()
## Warning: Using size for a discrete variable is not advised.

bike_sharing %>%
  filter(year == 2011) %>%
  mutate(frac_casual = casual/count) %>%
  ggplot(aes(date, frac_casual, color = workingday)) +
  geom_point() +
  labs(title="Fraction of casual riders by date in 2011", y = "fraction of casual riders")

bike_sharing %>%
  mutate(frac_casual = casual/count) %>%
  filter(workingday == TRUE) %>%
  filter(year == 2011) %>%
  arrange(desc(frac_casual)) %>%
  select(frac_casual, date)
## # A tibble: 250 × 2
##    frac_casual date      
##          <dbl> <date>    
##  1       0.392 2011-11-25
##  2       0.273 2011-03-18
##  3       0.255 2011-04-11
##  4       0.238 2011-07-15
##  5       0.236 2011-04-04
##  6       0.232 2011-07-01
##  7       0.221 2011-07-05
##  8       0.217 2011-08-23
##  9       0.214 2011-08-12
## 10       0.210 2011-08-05
## # … with 240 more rows

Vignette 3

bike_sharing %>%
  ggplot(aes(weather)) +
  geom_bar(fill = "dodgerblue")

bike_sharing %>%
  ggplot(aes(weather, fill=season)) +
  geom_bar()

bike_sharing %>%
  ggplot(aes(weather, fill=season)) +
  geom_bar() +
  facet_wrap(~season)

bike_sharing %>%
  ggplot(aes(weather, fill=season)) +
  geom_bar(position="dodge") +
  labs(title="weather by season")

bike_sharing %>%
  ggplot(aes(season, fill=weather)) +
  geom_bar(position="dodge") +
  labs(title="Season by weather")